Sys.setlocale("LC_ALL", "English")
library(cosinor)
library(cosinor2)
library(tidyverse)
library(dynlm)
library(lmtest)
library(sandwich)
library(zoo)
library(forcats)
library(readxl)
cbPalette <- c(
"#000000",  # black
"#E69F00",  # orange
"#56B4E9",  # sky blue
"#009E73",  # bluish green
"#F0E442",  # yellow
"#0072B2",  # blue
"#D55E00",  # vermillion
"#CC79A7"   # reddish purple
)
setwd("C:/Users/wxl00/OneDrive/Lulu-backup/Publications/Song Lyrics Project/Cultural Analytics/Data and Code Submitted")
# setwd("C:/Users/wangx/OneDrive - Dickinson College/Song Lyrics Project/How-Emotions-Are-Coded")
df <- read_csv("ThreeEmotionsCH_Year2_IntensityWithSign_avg.csv")
df$region_coarse[is.na(df$region_coarse)] <- 'unknown'
df$region[is.na(df$region)] <- 'unknown'
dF <- read_csv('ThreeEmotionsCH_sentiment scores.csv')
dF <- dF %>% filter(Year2 >= 1967) %>%
filter(IntensityWithSign <= 15, IntensityWithSign >= -15)
dF$region_coarse[is.na(dF$region_coarse)] <- 'unknown'
Df <- read_csv("Lyrics_split_Year2_IntensityWithSign.csv")
Df$region_coarse[is.na(Df$region_coarse)] <- 'unknown'
Df$region[is.na(Df$region)] <- 'unknown'
DF <- read_csv('Lyric_split_sentiment scores.csv')
DF$region_coarse[is.na(DF$region_coarse)] <- 'unknown'
DF <- DF %>% filter(Year2 >= 1967)
dF <- dF %>% semi_join(DF, by = c("id" = "id"))
DF <- DF %>% semi_join(dF, by = c("id" = "id"))
# 15. Manual check for evaluating ChatGPT-assisted vs. Lexicon-only methods ----
tp <- read_xlsx("manual check.xlsx")
tp$gpt <- ifelse(tp$IntensityWithSign1 < 0, "N", "P")
tp$lexicon <- ifelse(tp$IntensityWithSign2 < 0, "N", "P")
tp$gpt_r <- tp$Manual_Check == tp$gpt
tp$lexicon_r <- tp$Manual_Check == tp$lexicon
tp$gpt_T_P <- tp$Manual_Check == "P" & tp$gpt == 'P'
tp$gpt_F_P <- tp$Manual_Check == "N" & tp$gpt == 'P'
tp$gpt_T_N <- tp$Manual_Check == "N" & tp$gpt == 'N'
tp$gpt_F_N <- tp$Manual_Check == "P" & tp$gpt == 'N'
tp$lexicon_T_P <- tp$Manual_Check == "P" & tp$lexicon == 'P'
tp$lexicon_F_P <- tp$Manual_Check == "N" & tp$lexicon == 'P'
tp$lexicon_T_N <- tp$Manual_Check == "N" & tp$lexicon == 'N'
tp$lexicon_F_N <- tp$Manual_Check == "P" & tp$lexicon == 'N'
sum(tp$gpt_r)
sum(tp$lexicon_r)
gptTP <- sum(tp$gpt_T_P)
gptFP <- sum(tp$gpt_F_P)
gptTN <- sum(tp$gpt_T_N)
gptFN <- sum(tp$gpt_F_N)
lexiconTP <- sum(tp$lexicon_T_P)
lexiconFP <- sum(tp$lexicon_F_P)
lexiconTN <- sum(tp$lexicon_T_N)
lexiconFN <- sum(tp$lexicon_F_N)
precision_gptP <- gptTP/(gptTP + gptFP)
precision_gptN <- gptTN/(gptTN + gptFN)
precision_gpt_avg <- (precision_gptP + precision_gptN)/2
recall_gptP <- gptTP/(gptTP + gptFN)
recall_gptN <- gptTN/(gptTN + gptFP)
recall_gpt_avg <- (recall_gptP + recall_gptN)/2
f1_gptP <- 2*(precision_gptP * recall_gptP)/(precision_gptP + recall_gptP)
f1_gptN <- 2*(precision_gptN * recall_gptN)/(precision_gptN + recall_gptN)
f1_gpt_avg <- (f1_gptP + f1_gptN)/2
precision_lexiconP <- lexiconTP/(lexiconTP + lexiconFP)
precision_lexiconN <- lexiconTN/(lexiconTN + lexiconFN)
precision_lexicon_avg <- (precision_lexiconP + precision_lexiconN)/2
recall_lexiconP <- lexiconTP/(lexiconTP + lexiconFN)
recall_lexiconN <- lexiconTN/(lexiconTN + lexiconFP)
recall_lexicon_avg <- (recall_lexiconP + recall_lexiconN)/2
f1_lexiconP <- 2*(precision_lexiconP * recall_lexiconP)/(precision_lexiconP + recall_lexiconP)
f1_lexiconN <- 2*(precision_lexiconN * recall_lexiconN)/(precision_lexiconN + recall_lexiconN)
f1_lexicon_avg <- (f1_lexiconP + f1_lexiconN)/2
precision_gptP
precision_gptN
precision_gpt_avg
recall_gptP
recall_gptN
recall_gpt_avg
f1_gptP
f1_gptN
f1_gpt_avg
precision_lexiconP
precision_lexiconN
precision_lexicon_avg
recall_lexiconP
recall_lexiconN
recall_lexicon_avg
f1_lexiconP
f1_lexiconN
f1_lexicon_avg
table("ChatGPT-assisted" = tp$gpt_r, "Lexicon-only" = tp$lexicon_r)
tw <- read_xlsx("Taiwan misery.xlsx")
tp <- tw %>% left_join(df1, by = c("year" = "Year2"))
tp$cpi_s <- scale(tp$cpi)
tp$cpi_s <- scale(tp$cpi)
tp$gdp_s <- scale(tp$gdp)
tp$misery_s <- scale(tp$misery)
tp$sentiment <- scale(tp$IntensityWithSign_avg)
tp %>% ggplot(aes(x = year)) +
geom_point(aes(y = gdp_s), color = "red") +
geom_smooth(aes(y = gdp_s),color = "red", span = 0.5)+
geom_point(aes(y = sentiment), color = "blue") +
geom_smooth(aes(y = sentiment),color = "blue", span = 0.5)+
labs(x = "Year", y = "Standardized values")+
guides(color = guide_legend(title = NULL))+
scale_x_continuous(breaks = seq(1960, 2020, by = 10))
## 1.2 Three GPT words----
df1 <- df %>% group_by(Year2) %>%
summarise(IntensityWithSign_avg = weighted.mean(IntensityWithSign_avg, count), count = sum(count)) %>%
filter(Year2 >= 1967) %>%
select(Year2, IntensityWithSign_avg)
tp <- tw %>% left_join(df1, by = c("year" = "Year2"))
tp$cpi_s <- scale(tp$cpi)
tp$cpi_s <- scale(tp$cpi)
tp$gdp_s <- scale(tp$gdp)
tp$misery_s <- scale(tp$misery)
tp$sentiment <- scale(tp$IntensityWithSign_avg)
tp %>% ggplot(aes(x = year)) +
geom_point(aes(y = gdp_s), color = "red") +
geom_smooth(aes(y = gdp_s),color = "red", span = 0.5)+
geom_point(aes(y = sentiment), color = "blue") +
geom_smooth(aes(y = sentiment),color = "blue", span = 0.5)+
labs(x = "Year", y = "Standardized values")+
guides(color = guide_legend(title = NULL))+
scale_x_continuous(breaks = seq(1960, 2020, by = 10))
p <- tp %>% ggplot(aes(x = year)) +
geom_point(aes(y = gdp_s), color = "red") +
geom_smooth(aes(y = gdp_s),color = "red", span = 0.5)+
geom_point(aes(y = sentiment), color = "blue") +
geom_smooth(aes(y = sentiment),color = "blue", span = 0.5)+
labs(x = "Year", y = "Standardized values")+
guides(color = guide_legend(title = NULL))+
scale_x_continuous(breaks = seq(1960, 2020, by = 10))
p
ggsave("C:/Users/wxl00/OneDrive/Lulu-backup/Publications/Song Lyrics Project/Cultural Analytics/Fig S1.tiff", plot = p, width = 8, height = 6, dpi = 300, units = "in", compression = "lzw")
### 13.1.1 spectral analysis----
gdp_spec <- stats::spectrum(tp$gdp_s, log = "no", plot = F)
spx <- gdp_spec$freq
spy <- gdp_spec$spec*2
sp <- data.frame(spx, spy)
sp %>% ggplot(aes(x=spx, y=spy)) +
geom_line(size = 1) +
labs(x = "Frequency (periods/year)", y = "Spectral density")
tp1 <- tp %>%
select(year, gdp_s)  %>%
mutate(X = "X") %>%
unite(year, X, year, sep = "") %>%
spread(year, gdp_s)
year <- 1:ncol(tp1)
p <- periodogram(data = tp1, time = year)
p
p <- sp %>% ggplot(aes(x=spx, y=spy)) +
geom_line(size = 1) +
labs(x = "Frequency (periods/year)", y = "Spectral density")
p
ggsave("C:/Users/wxl00/OneDrive/Lulu-backup/Publications/Song Lyrics Project/Cultural Analytics/Fig S2.tiff", plot = p, width = 8, height = 6, dpi = 300, units = "in", compression = "lzw")
tp %>% ggplot(aes(x = year)) +
geom_point(aes(y = misery_s), color = "red") +
geom_smooth(aes(y = misery_s),color = "red", span = 0.5)+
geom_point(aes(y = sentiment), color = "blue") +
geom_smooth(aes(y = sentiment),color = "blue", span = 0.5)+
labs(x = "Year", y = "Standardized values")+
guides(color = guide_legend(title = NULL))+
scale_x_continuous(breaks = seq(1960, 2020, by = 10))
p <- tp %>% ggplot(aes(x = year)) +
geom_point(aes(y = misery_s), color = "red") +
geom_smooth(aes(y = misery_s),color = "red", span = 0.5)+
geom_point(aes(y = sentiment), color = "blue") +
geom_smooth(aes(y = sentiment),color = "blue", span = 0.5)+
labs(x = "Year", y = "Standardized values")+
guides(color = guide_legend(title = NULL))+
scale_x_continuous(breaks = seq(1960, 2020, by = 10))
p
ggsave("C:/Users/wxl00/OneDrive/Lulu-backup/Publications/Song Lyrics Project/Cultural Analytics/Fig S3.tiff", plot = p, width = 8, height = 6, dpi = 300, units = "in", compression = "lzw")
tp2 <-tp[!is.na(tp$misery_s),]
misery_spec <- stats::spectrum(tp2$misery_s, log = "no", plot = F)
spx <- misery_spec$freq
spy <- misery_spec$spec*2
sp <- data.frame(spx, spy)
p <- sp %>% ggplot(aes(x=spx, y=spy)) +
geom_line(size = 1) +
labs(x = "Frequency (periods/year)", y = "Spectral density")
p
ggsave("C:/Users/wxl00/OneDrive/Lulu-backup/Publications/Song Lyrics Project/Cultural Analytics/Fig S4.tiff", plot = p, width = 8, height = 6, dpi = 300, units = "in", compression = "lzw")
year <- 1:ncol(tp3)
p <- periodogram(data = tp3, time = year)
p
tp3 <- tp2 %>%
select(year, misery_s)  %>%
mutate(X = "X") %>%
unite(year, X, year, sep = "") %>%
spread(year, misery_s)
year <- 1:ncol(tp3)
p <- periodogram(data = tp3, time = year)
p
ggsave("C:/Users/wxl00/OneDrive/Lulu-backup/Publications/Song Lyrics Project/Cultural Analytics/Fig S5.tiff", plot = p, width = 8, height = 6, dpi = 300, units = "in", compression = "lzw")
